plotly 시각화 모음

Visualization
Author

신호연

Published

January 6, 2023

Setup

Code
# %pip install plotly (jupyter notebook)
from plotly.offline import iplot
import plotly.graph_objs as go
import plotly.io as pio
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
pio.renderers.default = "plotly_mimetype+notebook"
Code
import pandas as pd
timesData = pd.read_csv("./timesData.csv")
timesData.head(5)
world_rank university_name country teaching international research citations income total_score num_students student_staff_ratio international_students female_male_ratio year
0 1 Harvard University United States of America 99.7 72.4 98.7 98.8 34.5 96.1 20,152 8.9 25% NaN 2011
1 2 California Institute of Technology United States of America 97.7 54.6 98.0 99.9 83.7 96.0 2,243 6.9 27% 33 : 67 2011
2 3 Massachusetts Institute of Technology United States of America 97.8 82.3 91.4 99.9 87.5 95.6 11,074 9.0 33% 37 : 63 2011
3 4 Stanford University United States of America 98.3 29.5 98.1 99.2 64.3 94.3 15,596 7.8 22% 42 : 58 2011
4 5 Princeton University United States of America 90.9 70.3 95.4 99.9 - 94.2 7,929 8.4 27% 45 : 55 2011
Code
timesData.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2603 entries, 0 to 2602
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   world_rank              2603 non-null   object 
 1   university_name         2603 non-null   object 
 2   country                 2603 non-null   object 
 3   teaching                2603 non-null   float64
 4   international           2603 non-null   object 
 5   research                2603 non-null   float64
 6   citations               2603 non-null   float64
 7   income                  2603 non-null   object 
 8   total_score             2603 non-null   object 
 9   num_students            2544 non-null   object 
 10  student_staff_ratio     2544 non-null   float64
 11  international_students  2536 non-null   object 
 12  female_male_ratio       2370 non-null   object 
 13  year                    2603 non-null   int64  
dtypes: float64(4), int64(1), object(9)
memory usage: 284.8+ KB

Line Plot

Code
#1. prepare data frame
df = timesData.iloc[:100,:]

#2 creating trace and data
trace1 = go.Scatter(
    x = df.world_rank, #Sets the x coordinates
    y = df.citations, #Sets the y coordinates
    mode = "lines",#Determines the drawing mode for this scatter trace
)

data = [trace1]

#3 Set the layout version1
layout = dict(title = "Citation and Teaching")
#3 set the layout version2
#layout = go.Layout(title = "Citation and Teaching")

#4 create figure version1
fig = dict(data = data,layout = layout)
#4 create figure version2
#fig = go.Figure(data = data,layout = layout)

#5 plot figure
iplot(fig) 
#fig.show()

add markers and text

Code
#1 data frame
df = timesData.iloc[:100]

#2 trace and data
trace = go.Scatter(
    x = df.world_rank,
    y = df.citations,
    mode = "lines+markers", #add marker,
    marker = dict(color = "rgba(16,112,2,0.8)"),
    text = df.university_name #add text
)
data = [trace]
#3 layout and data
layout = go.Layout(
    title = "citation",
    xaxis = dict(title = "World Rank",ticklen = 5)
)

#4 create figure
fig = go.Figure(data = data,layout = layout)

#5 plot figure
fig.show()

version2가 뭔가 더 좋을듯?

Scatter Plot

Scatter plot과 lineplot 둘 다 go.Scatter 객체 사용. 다른점은 mode 설정

Code
#1. data frame
df2014 = timesData[timesData.year == 2014].iloc[:100,:]


#2. trace,data
trace = go.Scatter(
    x = df2014.world_rank, #Sets the x coordinates
    y = df2014.citations, #Sets the y coordinates
    mode = "markers",
)
data = [trace]
#3. layout 
layout = go.Layout(title = "Ciation vs world rank")
#4. create figure
fig = go.Figure(data = data,layout = layout)

#5. plot figure
fig.show()

add markers and text

Code
#1 data frame
df2014 = timesData[timesData.year == 2014].iloc[:100,:]

#2 trace,data
trace = go.Scatter(
    x = df2014.world_rank,
    y = df2014.citations,
    mode = "markers",
    #marker = dict(color = "green",opacity=0.8), #alpha(불투명도) 조절 vs1
    marker = dict(color = "rgba(255,128,2,0.8)"), #alpha(불투명도) 조절 vs2
    text = df2014.university_name,

)
data = [trace]

#3 layout
layout = go.Layout(xaxis = dict(title = "World Rank"),yaxis = dict(title = "Citation"))

#4 create figure
fig = go.Figure(data=data,layout=layout)

#5 plot
fig.show()

Histogram

Code
#1.data frame
x2011 = timesData.student_staff_ratio[timesData.year == 2011]

#2.trace&data
trace = go.Histogram(
    x = x2011,
)
data = [trace]
#3.layout
layout = go.Layout(title = "students-staff ratio in 2011")

#4.figure
fig = go.Figure(data,layout)

#5.plot
fig.show()

여러개의 차트 겹처 그리기

  • 여기서는 histogram으로 했으나 다른차트들도 가능
Code
#1.dataframe
x2011 = timesData.student_staff_ratio[timesData.year == 2011]
x2012 = timesData.student_staff_ratio[timesData.year == 2012]
#2.trace&data
trace1 = go.Histogram(
    x=x2011,
    #opacity=0.7, #불투명도 조절
    name="2011", #범례(legend)를 설정하기 위한 이름 설정
    marker=dict(color="rgb(171,50,96)",opacity=0.7)
)

trace2 = go.Histogram(
    x=x2012,
    name="2012",
    marker=dict(color="blue",opacity=0.7)
)
data=[trace1,trace2]
#3.layout
layout = go.Layout(
    barmode = "overlay", #trace 겹쳐 그리기
    xaxis=dict(title="students-staff ratio"),
    yaxis=dict(title="count"),
    title = dict(text = "histogram",x = 0.5)
)
#4 figure
fig = go.Figure(data=data,layout=layout)
fig.show()

참고자료 - Opacity와 alpha? : Opacity는 marker안팎에서 모두 쓰일 수 있으며 alpha는 rgba와 쓸때만 입력,같은 역할을 함. 단,Opacity를 marker의 밖에서 입력하면 trace안에서 밀도를 표현 하지 못함. 다른 trace끼리 겹칠때에는 밀도표현됨.(같은 trace에서만 안됨.)

Code
# 1.data frame
dataframe = timesData[timesData.year == 2015]

#2.trace and data
data = []
for col in ["world_rank","citations","income","total_score"]:
    _trace = go.Scatter(
        x = dataframe["world_rank"],
        y = dataframe[col],
        mode = "lines"
    )
    data.append(_trace)

#3. layout
layout = go.Layout(
    xaxis=dict(
        domain=[0, 0.45]
    ),
    yaxis=dict(
        domain=[0, 0.45]
    ),
    xaxis2=dict(
        domain=[0.55, 1]
    ),
    xaxis3=dict(
        domain=[0, 0.45],
        anchor='y3'
    ),
    xaxis4=dict(
        domain=[0.55, 1],
        anchor='y4'
    ),
    yaxis2=dict(
        domain=[0, 0.45],
        anchor='x2'
    ),
    yaxis3=dict(
        domain=[0.55, 1]
    ),
    yaxis4=dict(
        domain=[0.55, 1],
        anchor='x4'
    ),
    title = 'Research, citation, income and total score VS World Rank of Universities'
)

#4. fig
fig = make_subplots(rows=2,cols=2)
#5. plot
row = 1
col = 1
for trace in data:
    fig.append_trace(trace,row=row,col=col)
    col+=1
    if col > 2:
        col = 1
        row+=1
fig.show()
Code
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "xy"}, {"type": "polar"}],
           [{"type": "domain"}, {"type": "scene"}]],
)

fig.add_trace(go.Bar(y=[2, 3, 1]),
              row=1, col=1)

fig.add_trace(go.Barpolar(theta=[0, 45, 90], r=[2, 3, 1]),
              row=1, col=2)

fig.add_trace(go.Pie(values=[2, 3, 1]),
              row=2, col=1)

fig.add_trace(go.Scatter3d(x=[2, 3, 1], y=[0, 0, 0],
                           z=[0.5, 1, 2], mode="lines"),
              row=2, col=2)

fig.update_layout(height=700, showlegend=False)

fig.show()

Vector

Vector field(quiver plot)

사전준비

  • np.meshgrid : x좌표,y좌표를 가지는 벡터를 입력했을때, 두 벡터로 만들 수 있는 격자의 좌표(x,y)를 출력
Code
import numpy as np
x_coord = np.arange(0,2,.2)
y_coord = np.arange(0,2,.2)
x,y = np.meshgrid(np.arange(0,2,.2),np.arange(0,2,.2))
print(x_coord.shape,y_coord.shape)
print(x.shape,y.shape)
(10,) (10,)
(10, 10) (10, 10)
  • 격자(grid,matrix)에 함수 적용하면? => matrix(x,y 각각의 좌표)의 모든 요소에 함수가 적용됨
Code
print(np.cos(x).shape,np.sin(x).shape)
(10, 10) (10, 10)
  • 배열의 요소 값 차례대로 읽어보기 …

(0,0),(0.2,0),(0.4,0) … (1.8,0) => (0,0.2),(0.2,0.2),(0.4,0.2)…
x좌표 다 읽고 y좌표증가 그 다음 x좌표 다 읽고 y좌표 증가 …

Code
x,y
(array([[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8]]),
 array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
        [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4],
        [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6],
        [0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
        [1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ],
        [1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2],
        [1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4],
        [1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6],
        [1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8]]))

Gradient Vector Field

\(\nabla f = xe^{-x^2-y^2}\)

Code
#1.prepare data
x,y = np.meshgrid(np.arange(-2,2,0.2),np.arange(-2,2,.25)) #좌표
z = x*np.exp(-x**2-y**2) #함수

dx=0.2;dy=0.25 #dx,dy
v,u = np.gradient(z,dx,dy) #함수의 그레디언트(각좌표에서의 미분계수)
Code
#2.trace and data => 생략
#3.fig
fig = ff.create_quiver(x,y,u,v,scale=.25,arrow_scale=.4,name="quiver",line_width=1)
fig.add_trace(go.Scatter(x=[-.7,.75],y=[0,0],
                         mode="markers",
                         marker_size=12,
                         name="points"))
fig.show()
Code
x = np.linspace(-1,1,100)
y = np.linspace(-1,1,100)
xx,yy = np.meshgrid(x,y)
for i in range()
(array([[-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        ...,
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ]]),
 array([[-1.        , -1.        , -1.        , ..., -1.        ,
         -1.        , -1.        ],
        [-0.97979798, -0.97979798, -0.97979798, ..., -0.97979798,
         -0.97979798, -0.97979798],
        [-0.95959596, -0.95959596, -0.95959596, ..., -0.95959596,
         -0.95959596, -0.95959596],
        ...,
        [ 0.95959596,  0.95959596,  0.95959596, ...,  0.95959596,
          0.95959596,  0.95959596],
        [ 0.97979798,  0.97979798,  0.97979798, ...,  0.97979798,
          0.97979798,  0.97979798],
        [ 1.        ,  1.        ,  1.        , ...,  1.        ,
          1.        ,  1.        ]]))

1. 시점

  • 종점은 화살표로 표시해야 하므로 시점만 만들기
Code
import plotly.graph_objs as go
Code
#1. prepare data

#첫번째 벡터의 시점 x[0],y[0],z[0] 종점 x[1],y[1],z[1]
#두번째 벡터의 시점 x[2],y[2],z[2] 종점 x[2],y[2],z[2]
#두 개씩 묶임
x = [10.1219, 10.42579, 15.21396, 15.42468, 20.29639,20.46268, 25.36298, 25.49156]
y = [5.0545,  5.180104, 5.0545,   5.20337,  5.0545,  5.194271, 5.0545,   5.231627]
z = [5.2713,  5.231409, 5.2713,   5.231409, 5.2713 ,  5.235852,  5.2713, 5.231627]
#pairs = [(0,1),(2,3),(4,5),(6,7)]
[coord for coord in range(0,len(x),2)]
[0, 2, 4, 6]
Code
#2. trace,data(trace set)
trace1 = go.Scatter3d(
    x=[x[coord] for coord in range(0,len(x),2)],
    y=[y[coord] for coord in range(0,len(y),2)],
    z=[z[coord] for coord in range(0,len(z),2)],
    mode = "markers",
    line=dict(color="red")
)
data = [trace1]

#3. Layout
layout = go.Layout(title=dict(text = "vectors"))

#4. figure
fig = go.Figure(data=data,layout=layout)
fig.show()

2. 선 만들기

Code
#1.prepare data
x_lines = list()
y_lines = list()
z_lines = list()

for i in range(len(x)):
    x_lines.append(x[i])
    y_lines.append(y[i])
    z_lines.append(z[i])
    #plotly에서 Scatter의 line mode는 점과 점 사이에 선을 만듦
    #0,1번째 자리의 좌표에는 시점,종점을 넣고 3번째 자리에 None을 추가하여 점을 만들지 않음 
    #따라서, 선이 생기지 않음
    if i % 2 == 1:    
        x_lines.append(None)
        y_lines.append(None)
        z_lines.append(None)

#2.trace and tr_set(=data)
trace2 = go.Scatter3d(
    x=x_lines,
    y=y_lines,
    z=z_lines,
    mode = "lines",
    line = dict(width = 2, color = 'rgb(255, 0,0)')
)
data = [trace2]

#3.layout
layout = go.Layout(title = "lines")
#4.figure
fig = go.Figure(data=data,layout=layout)
#5.plotting
fig.show()
Code
#중간체크
data = [trace1,trace2]

#3.layout
layout = go.Layout(title = "lines")
#4.figure
fig = go.Figure(data=data,layout=layout)
#5.plotting
fig.show()

3.종점 만들기

Code
data = [trace1,trace2]

#3.layout
layout = go.Layout(title = "lines")
#4.figure
fig = go.Figure(data=data,layout=layout)
#5.plotting
fig.show()
Code
import plotly.graph_objs as go
# plotly.offline.init_notebook_mode()

x = [10.1219, 10.42579, 15.21396, 15.42468, 20.29639,20.46268, 25.36298, 25.49156]
y = [5.0545,  5.180104, 5.0545,   5.20337,  5.0545,  5.194271, 5.0545,   5.231627]
z = [5.2713,  5.231409, 5.2713,   5.231409, 5.2713 ,  5.235852,  5.2713, 5.231627]

pairs = [(0,1), (2,3),(4,5), (6,7)]

## plot ONLY the first ball in each pair of balls
trace1 = go.Scatter3d(
    x=[x[p[0]] for p in pairs],
    y=[y[p[0]] for p in pairs],
    z=[z[p[0]] for p in pairs],
    mode='markers',
    name='markers',
    line=dict(color='red')
)

go.Figure(data=trace1)